<--- %%NOBANNER%% --> regress.sas
 BackForward

/*------------------<--- Start of Description -->--------------------\
| PERFORM POLYNOMIAL REGRESSION ANALYSIS ON XVAR WITH THE OPTION OF  |
| FIRST ELIMINATING OUTLIERS. PROVIDE DESCRIPTIVE STATISTICS ON XVAR |
| AND YVAR, AND PERFORM TESTS OF MODELING ASSUMPTIONS.               |
|--------------------<--- End of Description -->---------------------|
|--------------------------------------------------------------------|
|--------------<--- Start of Files or Arguments Needed -->-----------|
|  PARAMETERS :  DSN  - INPUT DATA SET NAME.                         |
|                OPT  - OUTLIER ELIMINATION OPTION.                  |
|                          OFFORD - FLAG OUTLIERS BUT INCLUDE IN     |
|                                   ANALYSIS.                        |
|                          OBRIEN - FLAG OUTLIERS AND EXCLUDE FROM   |
|                                   ANALYSIS (CAUTION: MAY NOT BE    |
|                                   APPROPRIATE WITH 'SMALL'         |
|                                   DATASETS).                       |
|                XVAR - NAME OF X VARIABLE.                          |
|                YVAR - NAME OF Y VARIABLE.                          |
|                ID   - NAME OF ID VARIABLE (I.E. CLINIC).           |
|                C1   - X INTERVAL CUT-OFF POINTS (I.E. 5 10 15).    |
|                       IF OMITTED, MACRO WILL CHOOSE 4 INTERVALS    |
|                       BASED ON RANGE OF X VALUES.                  |
|                                                                    |
|  PROCESSING :  OUTLIERS - A VALUE IS FLAGGED AS AN OUTLIER IF      |
|                  IT IS MORE THAN ONE STANDARD DEVIATION FROM THE   |
|                  NEXT HIGHEST (LOWEST) VALUE.  (AT MOST 5 VALUES   |
|                  ARE CHECKED.)                                     |
|                                                                    |
|  OUTPUT     :  PAGE 1 -                                            |
|                 DESCRIPTIVE STATISTICS -                           |
|                  EXTREME VALUES WITH OUTLIERS FLAGGED.  MEANS &    |
|                  STD DEVS ARE PROVIDED FOR EACH X INTERVAL AND     |
|                  OVERALL.  P-VALUES FOR ANOVA & SPEARMAN'S TESTS.  |
|                 POLYNOMIAL REGRESSION ANALYSIS -                   |
|                  POLYNOMIAL COEFFICIENTS FOR LINEAR THRU QUINTIC   |
|                  MODELS ALONG WITH R-SQUARE AND P VALUES FOR       |
|                  MODEL COMPARISONS.                                |
|                 TESTS OF MODELING ASSUMPTIONS -                    |
|                  SKEWNESS, KURTOSIS, NORMALITY, SPEARMAN AND       |
|                  LEVENE TESTS ARE PERFORMED ON EACH OF THE MODELS. |
|                PAGE 2 -                                            |
|                  PLOT OF Y VS. X AND MEAN-Y VS. MEAN-X.            |
|                                                                    |
|  ERROR MSG  :  MESSAGES ARE OUTPUT IF AN INPUT PARAMETER IS        |
|                MISSING OR OUT OF RANGE.                            |
|---------------<--- End of Files or Arguments Needed -->------------|
|--------------------------------------------------------------------|
|----------------<--- Start of Example and Usage -->-----------------|
| Usage: %REGRESS(DSN,OPT,XVAR,YVAR,ID,C1);                          |
\-------------------<--- End of Example and Usage -->---------------*/
%MACRO REGRESS(DSN,OPT,XVAR,YVAR,ID,C1);
/*--------------------------------------------\
| Author:  C. D. STERTZ & S. L. DAOOD;        |
| Created: 8/26/88                            |
| Purpose: POLYNOMIAL REGRESSION ANALYSIS;    |
\--------------------------------------------*/
     %GLOBAL QUIT I N4 NOBSERV;
     OPTIONS DQUOTE;
     %IF &DSN=  %THEN %DO;
         %PUT "ERROR: NO DATASET NAME WAS SUPPLIED IN PARM LIST.";
         %GO TO ENDUP;
     %END;
     %IF &OPT=  %THEN %DO;
         %PUT "ERROR: NO OPTION (OBRIEN/OFFORD) WAS SUPPLIED.";
         %GO TO ENDUP;
     %END;
     %IF ^(&OPT=OBRIEN | &OPT=OFFORD) %THEN %DO;
         %PUT "ERROR: OPTION MUST BE OBRIEN OR OFFORD.";
         %GO TO ENDUP;
     %END;
     %IF &XVAR=   %THEN %DO;
         %PUT "ERROR: NO X VARIABLE SUPPLIED";
         %GO TO ENDUP;
     %END;
     %IF &YVAR=   %THEN %DO;
         %PUT "ERROR: NO Y VARIABLE SUPPLIED";
         %GO TO ENDUP;
     %END;
     %LET _TIT=    ;
     %IF &OPT=OBRIEN %THEN %LET _TIT="OUTLIERS ELIMINATED";

****  COUNT & ELIMINATE MISSINGS  ****;
     DATA _DSN;  SET &DSN END=EOF;
        KEEP &ID &XVAR &YVAR;
        RETAIN  DEL_CNT 0;
        IF (&XVAR=. | &YVAR=.)  THEN DEL_CNT = DEL_CNT+1;
        IF (EOF)  THEN DO;
          DELC = PUT(DEL_CNT,3.);
          CALL SYMPUT('_DELS',DELC);
        END;
        IF (&XVAR=. | &YVAR=.)  THEN DELETE;

     %IF &C1=   %THEN %DO;
         %LET NUM=4;
         DATA _NULL_; SET _DSN END=EOF;
            RETAIN BOTTOM TOP;
            IF (_N_ =1)  THEN DO;
               BOTTOM=&XVAR; TOP=&XVAR;
            END;
            IF (&XVARTOP)  THEN TOP=&XVAR;
            IF (EOF)  THEN DO;
               INTERVAL=(TOP - BOTTOM)/4;
               T1=CEIL(BOTTOM + INTERVAL);
               T2=CEIL(T1 + INTERVAL);
               T3=CEIL(T2 + INTERVAL);
               TT1='     '; TT2='     ';   TT3='     ';
               TT1=PUT(T1,Z5.);
               TT2=PUT(T2,Z5.);
               TT3=PUT(T3,Z5.);
               NEWVAL='                    ';
               NEWVAL=TT1 || ' ' || TT2 || ' ' || TT3;
               CALL SYMPUT('C1',NEWVAL);
            END;
     %END;

     DATA _DSN(KEEP=&ID GROUP WEIGH X X2 X3 X4 X5 Y);
            SET _DSN END=EOF;
       RETAIN COUNT 0 DEL_CNT 0;
       GROUP="            ";
       %LET BE=X;
       %LET I=1;
       X  = &XVAR;
       X2 = X*X;
       X3 = X2*X;
       X4 = X3*X;
       X5 = X4*X;
       Y  = &YVAR;
       COUNT = COUNT + 1;
       %DO %UNTIL(&BE= );
            %LET BI=%SCAN(&C1,&I,' ');
            %LET BE=%SCAN(&C1,&I+1,' ');
            %IF &I=1 %THEN %DO;
               IF &XVAR <&BI THEN DO;
                 GROUP="    X<&BI";
                 WEIGH=0;
               END;
            %END;
            %IF &BE^=  %THEN %DO;
               %IF &BE<&BI %THEN %DO;
                 PUT "ERROR: NON-INCREASING INTERVALS IN PARM LIST";
                 %GO TO ENDUP;
               %END;
               IF &BI <= &XVAR < &BE THEN DO;
                 GROUP="&BI<=X<&BE";
                 WEIGH=1;
               END;
            %END;
            %ELSE %DO;
               IF &XVAR>=&BI THEN DO;
                 GROUP="&BI<=X    ";
                 WEIGH=2;
               END;
            %END;
            %LET I=%EVAL(&I+1);
       %END;
     OUTPUT _DSN;
     %LET NUM=%EVAL(&I);
     IF EOF THEN DO;
       CC = PUT(COUNT,6.);
       CALL SYMPUT('QUIT',CC);
     END;

  DATA _DSN; SET _DSN END=EOF;
   %IF &QUIT>=6 %THEN %DO;

     RETAIN COUNT 0;
     COUNT = COUNT + 1;
     ID = &ID;
     %LET I=1;
     %DO II=1 %TO &NUM;
        G&I=0;
        %LET I=%EVAL(&I+1);
     %END;
     %LET BE=X;
     %LET I=1;
       %DO %UNTIL(&BE= );
            %LET BI=%SCAN(&C1,&I,' ');
            %LET BE=%SCAN(&C1,&I+1,' ');
            %IF &I=1 %THEN %DO;
                %LET II=1;
                IF X < &BI THEN G&II=1;
            %END;
            %LET II=%EVAL(&II+1);
            %IF &BE^=  %THEN %DO;
                %IF &BE<=&BI %THEN %DO;
                   PUT "ERROR: NON-INCREASING INTERVALS IN PARM LIST";
                   %GO TO ENDUP;
                %END;
                IF &BI <= X < &BE THEN G&II=1;
            %END;
            %ELSE %DO;
                IF X >= &BI THEN G&II=1;
            %END;
            %LET I=%EVAL(&I+1);
       %END;
       IF EOF THEN DO;
         NN = PUT(COUNT,Z5.);
         DO I=1 TO 5;
            IF (SUBSTR(NN,I,1)^='0') THEN DO;
               INDEX = I;
               I=5;
            END;
         END;
         NNN = SUBSTR(NN,INDEX,5-INDEX+1);
         CALL SYMPUT('NOBSERV',NNN);
      END;

****  DETERMINE AND THROW OUT OUTLIERS IF REQUESTED  ****;
%MACRO STD_DEV;
  SM = 0;  SM_SQ = 0;
  DO I=START TO END;
    SM = SM + OBS(I);
    SM_SQ = SM_SQ + (OBS(I)**2);
  END;
  SQ_SM = SM**2 / (END - START + 1);
  S2 = (SM_SQ - SQ_SM) / (END - START);
  STDEV = SQRT(S2);
%MEND STD_DEV;

%MACRO STD_CHK;
    START = 1;
    END = &NOBSERV;
    %STD_DEV;
    IF (&NOBSERV>=2  &  ABS(OBS(1) - OBS(2)) > STDEV) THEN DO;
      OUTL(1) = '*';
      START = 2;
      %STD_DEV;
    END;
    IF (&NOBSERV>=3  &  ABS(OBS(2) - OBS(3)) > STDEV) THEN DO;
      OUTL(1) = '*';
      OUTL(2) = '*';
      START = 3;
      %STD_DEV;
    END;
    IF (&NOBSERV>=4  &  ABS(OBS(3) - OBS(4)) > STDEV)  THEN DO;
      OUTL(1) = '*';
      OUTL(2) = '*';
      OUTL(3) = '*';
      START = 4;
      %STD_DEV;
    END;
    IF (&NOBSERV>=5  &  ABS(OBS(4) - OBS(5)) > STDEV)  THEN DO;
      OUTL(1) = '*';
      OUTL(2) = '*';
      OUTL(3) = '*';
      OUTL(4) = '*';
      START = 5;
      %STD_DEV;
    END;
    IF (&NOBSERV>=6  &  ABS(OBS(5) - OBS(6)) > STDEV)  THEN DO;
      OUTL(1) = '*';
      OUTL(2) = '*';
      OUTL(3) = '*';
      OUTL(4) = '*';
      OUTL(5) = '*';
      START = 6;
      %STD_DEV;
    END;
    IF (&NOBSERV>=7  &
               ABS(OBS(&NOBSERV) - OBS(&NOBSERV-1)) > STDEV) THEN DO;
      OUTL(10) = '*';
      END = &NOBSERV - 1;
      IF (END > START)  THEN DO;
        %STD_DEV;
      END;
    END;
    IF (&NOBSERV>=8  &
               ABS(OBS(&NOBSERV-1) - OBS(&NOBSERV-2)) > STDEV) THEN DO;
      OUTL(10) = '*';
      OUTL(9) = '*';
      END = &NOBSERV - 2;
      IF (END > START)  THEN DO;
        %STD_DEV;
      END;
    END;
    IF (&NOBSERV>=9  &
               ABS(OBS(&NOBSERV-2) - OBS(&NOBSERV-3)) > STDEV) THEN DO;
      OUTL(10) = '*';
      OUTL(9) = '*';
      OUTL(8) = '*';
      END = &NOBSERV - 3;
      IF (END > START)  THEN DO;
        %STD_DEV;
      END;
    END;
    IF (&NOBSERV>=10  &
               ABS(OBS(&NOBSERV-3) - OBS(&NOBSERV-4)) > STDEV) THEN DO;
      OUTL(10) = '*';
      OUTL(9) = '*';
      OUTL(8) = '*';
      OUTL(7) = '*';
      END = &NOBSERV - 4;
      IF (END > START)  THEN DO;
        %STD_DEV;
      END;
    END;
    IF (&NOBSERV>=11  &
               ABS(OBS(&NOBSERV-4) - OBS(&NOBSERV-5)) > STDEV) THEN DO;
      OUTL(10) = '*';
      OUTL(9) = '*';
      OUTL(8) = '*';
      OUTL(7) = '*';
      OUTL(6) = '*';
      END = &NOBSERV - 5;
      IF (END > START)  THEN DO;
        %STD_DEV;
      END;
    END;
%MEND STD_CHK;

%MACRO OUTLR(L);
  PROC SORT DATA=_DSN; BY DESCENDING &L;

  PROC TRANSPOSE DATA=_DSN OUT=_TRANSID  PREFIX=ID&L._;
    VAR ID;

  DATA _TRANSID; SET _TRANSID;
    %LET N4 = 6;
    %IF (&NOBSERV > 10)  %THEN %DO;
      %LET N4 = %EVAL(&NOBSERV-4);
    %END;
    KEEP ID&L._1-ID&L._5 ID&L._&N4-ID&L._&NOBSERV;

  PROC TRANSPOSE DATA=_DSN OUT=_TRANS  PREFIX=&L._;
    VAR &L.;

  DATA _OUTL&L; SET _TRANS;
    LENGTH  OUTL&L._1-OUTL&L._10  $1.;
    ARRAY OBS(*)  &L._1-&L._&NOBSERV;
    ARRAY OUTL(10)  OUTL&L._1-OUTL&L._10;
    %LET N4 = 6;
    %IF (&NOBSERV > 10)  %THEN %DO;
      %LET N4 = %EVAL(&NOBSERV-4);
    %END;
    KEEP &L._1-&L._5  &L._&N4-&L._&NOBSERV  OUTL&L._1-OUTL&L._10;
    %STD_CHK;

  DATA _MNMX&L;
    IF _N_=1 THEN SET _TRANSID;
    SET _OUTL&L;
%MEND OUTLR;

****  REMOVE OUTLYING X  ****;
%OUTLR(X);

****  REMOVE OUTLYING Y  ****;
%OUTLR(Y);

DATA _T_DAT;
  IF _N_=1 THEN DO;
    MERGE _MNMXX _MNMXY;
  END;
  SET _DSN;
  KEEP ID X Y WEIGH GROUP X2-X5 G1-G&NUM;
  %LET N4 = %EVAL(&NOBSERV-4);
  %LET N3 = %EVAL(&NOBSERV-3);
  %LET N2 = %EVAL(&NOBSERV-2);
  %LET N1 = %EVAL(&NOBSERV-1);
  %IF (&OPT = OBRIEN) %THEN %DO;
      IF ((X=X_1 & OUTLX_1='*')   | (X=X_2 & OUTLX_2='*')   |
          (X=X_3 & OUTLX_3='*')   | (X=X_4 & OUTLX_4='*')   |
          (X=X_5 & OUTLX_5='*')   | (X=X_&N4 & OUTLX_6='*') |
          (X=X_&N3 & OUTLX_7='*') | (X=X_&N2 & OUTLX_8='*') |
          (X=X_&N1 & OUTLX_9='*') | (X=X_&NOBSERV & OUTLX_10='*') |
          (Y=Y_1 & OUTLY_1='*')   | (Y=Y_2 & OUTLY_2='*')   |
          (Y=Y_3 & OUTLY_3='*')   | (Y=Y_4 & OUTLY_4='*')   |
          (Y=Y_5 & OUTLY_5='*')   | (Y=Y_&N4 & OUTLY_6='*') |
          (Y=Y_&N3 & OUTLY_7='*') | (Y=Y_&N2 & OUTLY_8='*') |
          (Y=Y_&N1 & OUTLY_9='*') | (Y=Y_&NOBSERV & OUTLY_10='*'))
          THEN DELETE;
  %END;

 PROC SORT DATA=_T_DAT; BY WEIGH GROUP;

 DATA _GRPS; SET _T_DAT END=EOF; BY WEIGH GROUP;
    RETAIN G_NUM 0;
    KEEP _GRPS;
    IF FIRST.GROUP THEN DO;
        _GRPS=GROUP;
        G_NUM= G_NUM + 1;
        OUTPUT;
    END;
    IF EOF THEN DO;
         NN = PUT(G_NUM,Z2.);
         DO I=1 TO 2;
            IF (SUBSTR(NN,I,1)^='0') THEN DO;
               INDEX = I;
               I=2;
            END;
         END;
         NNN = SUBSTR(NN,INDEX,2-INDEX+1);
         CALL SYMPUT('NUM',NNN);
    END;

 DATA _GRPS; SET _GRPS END=EOF;
    KEEP _GRPS;
    OUTPUT;
    IF EOF THEN DO;
       CT=_N_; _GRPS="            ";
       DO WHILE(CT<10);
          CT = CT + 1;
          OUTPUT;
       END;
       _GRPS='  TOTAL';
       OUTPUT;
    END;

DATA _DSN; SET _T_DAT END=EOF;
  IF (EOF)  THEN DO;
    OUTS = &NOBSERV - _N_;
    OUTSPUT = PUT(OUTS,3.);
    CALL SYMPUT('_OUTS',OUTSPUT);
    NN = PUT(_N_,Z5.);
    DO I=1 TO 5;
      IF (SUBSTR(NN,I,1) ^= '0') THEN DO;
        INDEX = I;
        I = 5;
      END;
    END;
    NNN = SUBSTR(NN,INDEX,5-INDEX+1);
    CALL SYMPUT('NOBSERV',NNN);
  END;
  %LET KK = %EVAL(&NUM-1);

****  CALCULATE GROUP & TOTAL STATS  ****;
PROC SORT DATA=_DSN; BY GROUP WEIGH;

PROC MEANS NOPRINT DATA=_DSN;
  BY GROUP WEIGH;
  VAR Y X;
  OUTPUT OUT=_GRPSTAT MEAN=MN_Y MN_X
                      STD=STD_Y
                      N=N;

PROC SORT DATA=_GRPSTAT; BY WEIGH GROUP;

PROC MEANS  NOPRINT DATA=_DSN;
  VAR Y X;
  OUTPUT OUT=_TOTSTAT(KEEP=MN_Y MN_X STD_Y STD_X CSS_Y CSS_X N)
                      MEAN=MN_Y MN_X
                      STD=STD_Y STD_X
                      CSS=CSS_Y CSS_X
                      N=N;

DATA _NULL_;  SET _TOTSTAT;
  CALL SYMPUT('MEANX', MN_X);

DATA _MEANS;  SET _GRPSTAT(IN=INGP) _TOTSTAT(IN=INSMN);
  DROP CT SAVE1-SAVE7;
  IF INGP THEN OUTPUT;
  IF INSMN  THEN DO;
    SAVE1=N; SAVE2=MN_Y; SAVE3=MN_X; SAVE4=STD_Y;
    SAVE5=STD_X; SAVE6=CSS_Y; SAVE7=CSS_X;
    N=.; MN_Y=.; MN_X=.; STD_Y=.; STD_X=.; CSS_Y=.; CSS_X=.;
    GROUP='       ';
    CT=_N_;
    DO WHILE (CT<=10);
       OUTPUT;
       CT=CT + 1;
    END;
    GROUP='  TOTAL';
    N=SAVE1; MN_Y=SAVE2; MN_X=SAVE3;  STD_Y=SAVE4;
    STD_X=SAVE5;  CSS_Y=SAVE6;  CSS_X=SAVE7;
    OUTPUT;
  END;

****  TRANSFORM X VALUES AROUND MEAN  ****;
DATA _DSNSAV;  SET _DSN;

DATA _DSN;  SET _DSN;
  X = X - &MEANX;
  X2 = X * X;
  X3 = X2 * X;
  X4 = X3 * X;
  X5 = X4 * X;

****  FIT POLYNOMIAL MODELS - OUTPUT COEFF & RESIDUALS ****;
PROC SORT DATA=_DSN; BY X Y ID;
PROC REG  DATA=_DSN NOPRINT;
  MEAN:      MODEL Y=;  OUTPUT OUT=_RES_0 R=R_0;
PROC REG  DATA=_DSN NOPRINT OUTEST=_OUTREGR;
  LINEAR:    MODEL Y=X;  OUTPUT OUT=_RES_1 R=R_1;
  QUADRATC:  MODEL Y=X X2;  OUTPUT OUT=_RES_2 R=R_2;
  CUBIC:     MODEL Y=X X2 X3;  OUTPUT OUT=_RES_3 R=R_3;
  QUARTIC:   MODEL Y=X X2 X3 X4;  OUTPUT OUT=_RES_4 R=R_4;
  QUINTIC:   MODEL Y=X X2 X3 X4 X5;  OUTPUT OUT=_RES_5 R=R_5;


****  ANOVA ON Y & MODIFIED LEVENE TEST ****;
DATA _LEV; MERGE  _RES_0 _RES_1 _RES_2 _RES_3 _RES_4 _RES_5;  BY X Y ID;

PROC UNIVARIATE  NOPRINT DATA=_LEV;
  VAR  R_0  R_1  R_2  R_3  R_4  R_5;
  OUTPUT OUT=R_STAT MEDIAN=LMD_0  LMD_1  LMD_2  LMD_3  LMD_4  LMD_5
                    MEAN = MNR_0  MNR_1  MNR_2  MNR_3  MNR_4  MNR_5
                    STD  = STDR_0 STDR_1 STDR_2 STDR_3 STDR_4 STDR_5;

DATA _LEV;
  IF _N_=1  THEN SET R_STAT;
  SET _LEV;
  LEV_0 = ABS(R_0 - LMD_0);
  LEV_1 = ABS(R_1 - LMD_1);
  LEV_2 = ABS(R_2 - LMD_2);
  LEV_3 = ABS(R_3 - LMD_3);
  LEV_4 = ABS(R_4 - LMD_4);
  LEV_5 = ABS(R_5 - LMD_5);
  IF (&NOBSERV>50)  THEN DO;
    Z_0 = PROBNORM((R_0 - MNR_0)/STDR_0);
    Z_1 = PROBNORM((R_1 - MNR_1)/STDR_1);
    Z_2 = PROBNORM((R_2 - MNR_2)/STDR_2);
    Z_3 = PROBNORM((R_3 - MNR_3)/STDR_3);
    Z_4 = PROBNORM((R_4 - MNR_4)/STDR_4);
    Z_5 = PROBNORM((R_5 - MNR_5)/STDR_5);
  END;

%MACRO GG;
  %DO I=1 %TO &KK;
    G&I
  %END;
%MEND GG;

PROC RSQUARE  NOPRINT DATA=_LEV  OUTEST=_OUTRSQ SSE MSE;
        MODEL Y = %GG     / INCLUDE=&KK;
        MODEL LEV_0 = %GG / INCLUDE=&KK;
        MODEL LEV_1 = %GG / INCLUDE=&KK;
        MODEL LEV_2 = %GG / INCLUDE=&KK;
        MODEL LEV_3 = %GG / INCLUDE=&KK;
        MODEL LEV_4 = %GG / INCLUDE=&KK;
        MODEL LEV_5 = %GG / INCLUDE=&KK;

DATA _AONY _LEV_P;  SET _OUTRSQ;
  SSR = _RSQ_ * _SSE_ / (1 - _RSQ_);
  MSB = SSR / (&NUM-1);
  F = MSB / _MSE_;
  P = 1 - PROBF(F,&NUM-1,&NOBSERV-&NUM);
  IF _N_=1  THEN OUTPUT _AONY;
  IF _N_^=1  THEN OUTPUT _LEV_P;

****  NORMALITY  ****;
%LET W_P_VAL1 = .753/.687/.686/ .713/.730/.749/.764/.781/
      .792/.805/.814/.825/.835/ .844/.851/.858/.863/.868/
      .873/.878/.881/.884/.888/ .891/.894/.896/.898/.900/
      .902/.904/.906/.908/.910/ .912/.914/.916/.917/.919/
      .920/.922/.923/.924/.926/ .927/.928/.929/.929/.930;
%LET W_P_VAL2 = .756/.707/.715/ .743/.760/.778/.791/.806/
      .817/.828/.837/.846/.855/ .863/.869/.874/.879/.884/
      .888/.892/.895/.898/.901/ .904/.906/.908/.910/.912/
      .914/.915/.917/.919/.920/ .922/.924/.925/.927/.928/
      .929/.930/.932/.933/.934/ .935/.936/.937/.937/.938;
%LET W_P_VAL3 = .767/.748/.762/ .788/.803/.818/.829/.842/
      .850/.859/.866/.874/.881/ .887/.892/.897/.901/.905/
      .908/.911/.914/.916/.918/ .920/.923/.924/.926/.927/
      .929/.930/.931/.933/.934/ .935/.936/.938/.939/.940/
      .941/.942/.943/.944/.945/ .945/.946/.947/.947/.947;
%LET W_P_VAL4 = .789/.792/.806/ .826/.838/.851/.859/.869/
      .876/.883/.889/.895/.901/ .906/.910/.914/.917/.920/
      .923/.926/.928/.930/.931/ .933/.935/.936/.937/.939/
      .940/.941/.942/.943/.944/ .945/.946/.947/.948/.949/
      .950/.951/.951/.952/.953/ .953/.954/.954/.955/.955;

%MACRO NORM;
  %IF (&NOBSERV<=50)  %THEN %DO;
      PROC UNIVARIATE  NOPRINT DATA=_LEV;
        VAR R_0 R_1 R_2 R_3 R_4 R_5;
        OUTPUT OUT=_NORML  NORMAL=W_0 W_1 W_2 W_3 W_4 W_5;

      %MACRO PN;
        %DO I=0 %TO 5;
          PNORM_&I = '      P>.10';
          IF (W_&I <= &W4) THEN PNORM_&I = ' .0550)  %THEN %DO;
    %DO I=0 %TO 5;
      PROC SORT DATA=_LEV;  BY Z_&I;
      PROC TRANSPOSE DATA=_LEV OUT=_ZTRAN(DROP=_NAME_) PREFIX=Z_;
        VAR Z_&I.;

      %MACRO A2_CALC;
        ARRAY Z(&NOBSERV)  Z_1 - Z_&NOBSERV;
        A2_&I. = 0;
        DO J=1 TO &NOBSERV;
          A2_&I = A2_&I +
                (2*J-1) * (LOG(Z(J)) + LOG(1 - Z(&NOBSERV-J+1)));
        END;
        A2_&I = (-1 * A2_&I / &NOBSERV) - &NOBSERV;
        A2_&I = A2_&I * (1 + .75/&NOBSERV + 2.25/(&NOBSERV**2));
        IF (A2_&I >= 1.035) THEN PNORM_&I = '     P<=.01';
        IF (A2_&I < 1.035)  THEN PNORM_&I = '.011 THEN DO;
            PUT #PN @52 _GRPS $CHAR12. @65 N 6. @73 MN_X 10.2 @85 MN_Y
                10.2 @97 STD_Y 10.2 ;
            PN=PN+1;
         END;
      END;
   PUT #20 @1 130*"=";
   PUT @1 "| POLYNOMIAL REGRESSION ANALYSIS |"/
       @2 "--------------------------------" @117 "P-VALUE" /
       @43 "COEFFICIENTS" @87"ERROR" @112 "MODEL(I)  MODEL(I)"/
       @16 65*"-" @83 14*"-" @104 "2" @114 "VS        VS"/
       @2 "MODEL(I)" @24 "B0" @35 "B1" @46 "B2" @57 "B3" @68 "B4"
       @79 "B5" @84 "DF"  @92 "RMSE" @103 "R"
       @110 "MODEL(I-1)  MODEL(0)"/
       @2 130*"-";
      SET MEAN_PGA;
       PUT @2 "MEAN(0)" @16 MN_Y 10.3 @82 EDF_MN 4. @88 RMSE_MN 8.2;
   DO I=1 TO 5;
      SET _PGANAL(drop=_type_);
      IF _MODEL_="LINEAR" THEN _MODEL="LINEAR(1)    ";
         ELSE IF _MODEL_="QUADRATC" THEN _MODEL="QUADRATIC(2)";
         ELSE IF _MODEL_="CUBIC" THEN _MODEL="CUBIC(3)";
         ELSE IF _MODEL_="QUARTIC" THEN _MODEL="QUARTIC(4)";
         ELSE IF _MODEL_="QUINTIC" THEN _MODEL="QUINTIC(5)";
      PUT
       @2 _MODEL  @16 B0 10.3 @27 B1 10.4  @38 B2 10.4 @49 B3 10.5
       @60 B4 10.5 @71 B5 10.5 @82 _EDF_ 4. +2 _RMSE_  8.2 +2
       _RSQ_  8.3  @112 P_I_1 8.3  +2 P_0 8.3;
   END;
   PUT @1 130*"=";
   PUT @1  "| TEST OF MODELLING ASSUMPTIONS USING RESIDUALS "
       @49 "FROM THE FOLLOWING MODELS : |";
   PUT @2 75*"-" /
       @39 "MEAN        LINEAR     QUADRATIC         CUBIC       "
       @92 "QUARTIC       QUINTIC";
   SET _ZP;
   PUT @3 "SKEWNESS : COEFFICIENT" @35 SKW_R0 8.2 +6 SKW_R1 8.2
       +6 SKW_R2 8.2 +6 SKW_R3 8.2 +6 SKW_R4 8.2  +6 SKW_R5 8.2 /
       @18 "Z" @35 Z_SKW_R0 8.2 +6 Z_SKW_R1 8.2 +6 Z_SKW_R2 8.2
       +6 Z_SKW_R3 8.2 +6 Z_SKW_R4 8.2 +6 Z_SKW_R5 8.2 //
       @3 "KURTOSIS : COEFFICIENT" @35 KRT_R0 8.2 +6 KRT_R1 8.2
       +6 KRT_R2 8.2 +6 KRT_R3 8.2 +6 KRT_R4 8.2  +6 KRT_R5 8.2 /
       @18 "Z" @35 Z_KRT_R0 8.2 +6 Z_KRT_R1 8.2 +6 Z_KRT_R2 8.2
       +6 Z_KRT_R3 8.2 +6 Z_KRT_R4 8.2 +6 Z_KRT_R5 8.2 /;
   SET _NORML;
   PUT @3 "NORMALITY :    P      " @32 PNORM_0 $CHAR11. +3
       PNORM_1 $CHAR11.  +3 PNORM_2 $CHAR11.  +3 PNORM_3 $CHAR11.
       +3 PNORM_4 $CHAR11.  +3 PNORM_5 $CHAR11. //
       @3 "COMMON VARIANCE :" ;
     SET _P_SPEAR;
       PUT @4 "P VALUE FROM SPEARMANS R" @35 P_R0 8.3 +6 P_R1 8.3
           +6 P_R2 8.3 +6 P_R3 8.3 +6 P_R4 8.3 +6 P_R5 8.3/
           @6 "USING ABSOLUTE RESIDUALS" /;
     SET __LEV_P;
       PUT @4 "P VALUE USING MODIFIED" @35 LEVP0 8.3 +6 LEVP1 8.3
           +6 LEVP2 8.3 +6 LEVP3 8.3 +6 LEVP4 8.3 +6 LEVP5 8.3 /
           @6 "LEVENE TEST";
    STOP;

  DATA _ME; SET _MEANS;
    IF GROUP='     ' | GROUP='  TOTAL' THEN DELETE;

  DATA _PLOT; MERGE _DSNSAV _ME;
  PROC PLOT DATA=_PLOT; PLOT Y*X MN_Y*MN_X="*"/OVERLAY;
    LABEL Y=%UNQUOTE(%QUOTE(%'&YVAR%'))
          X=%UNQUOTE(%QUOTE(%'&XVAR%'));

  %END;
  %ELSE %DO;
    PUT 'INSUFFICIENT NON-MISSING OBSERVATIONS (N<6)';
  %END;
  %ENDUP:
  OPTIONS NODQUOTE;
%MEND REGRESS;